* Collapse turbine database into plant-level data

* plant IDs and plant names are in the public domain as part of the US Wind Turbine Database.

* read turbine data
use "/projects/users/########/Snapshot2022/RawOutsideData/uswtdb_v5_3_w_p_id.dta", clear
	
* check out average turbine/plant time series patterns
preserve 
collapse p_tnum p_cap t_cap t_hh, by(p_year)

tsset p_year

* average turbine number per plant stable since mid-90s and especially early 2000s
tsline p_tnum

* average plant capacity increasing since mid-90s
tsline p_cap

* average individual turbine capacity increasing since mid-90s
tsline t_cap

* average turbine height increasing since 1989
tsline t_hh

restore

* clean some errant turbine records
keep if substr(p_name,1,7) != "unknown"
keep if p_year >=1995 & p_year !=.
keep if p_year < 2022
keep if !missing(p_cap)
keep if t_state!="AK" & t_state!="PR" & t_state!="GU" & t_state != "HI"
keep if p_tnum < 700 & p_tnum > 7 & p_cap >5 

sort p_id eia_id p_year t_state p_name
* some manual ID assignment public records
replace p_id = 700220 if p_name == "Karen Avenue Windfarm" & missing(p_id)
replace p_year = 2003 if p_name == "Karen Avenue Windfarm" 
* San Gorgonio Farms Wind Farm has p_year = 2008 and 2015. treat as separate.
* browse eia_id t_state t_fips p_name p_year p_tnum p_cap t_cap xlong ylat p_id if strpos(p_name, "Gorgonio") 
replace p_id = 900001 if p_name == "San Gorgonio Farms Wind Farm" & p_year == 2008
replace p_id = 900002 if p_name == "San Gorgonio Farms Wind Farm" & missing(p_id)
replace p_year = 2015 if p_name == "San Gorgonio Farms Wind Farm" & p_id == 9000002
* Blue Summit II
replace p_id = 701607 if p_name == "Blue Summit II" & missing(p_id)
* TB Flats I & II
replace p_id = 900003 if p_name == "TB Flats I & II" & missing(p_id)
* Mesquite Sky 
replace p_id = 900004 if p_name == "Mesquite Sky" & missing(p_id)
* King Plains
replace p_id = 900005 if p_name == "King Plains" & missing(p_id)
* Point Wind Plant
replace p_id = 900006 if p_name == "Point Wind Plant" & missing(p_id)
* Maverick
replace p_id = 701847 if p_name == "Maverick Wind" & missing(p_id)
* La Chalupa
replace p_id = 900007 if p_name == "La Chalupa" & missing(p_id)
* Iron Star
replace p_id = 900008 if p_name == "Iron Star" & missing(p_id)
* Black Rock
replace p_id = 900009 if p_name == "Black Rock" & missing(p_id)
* Las Majadas
replace p_id = 701809 if p_name == "Las Majadas" & missing(p_id) & p_year == 2021
* Painted Hills Repower 
replace p_id = 900010 if p_name == "Painted Hills Repower" & missing(p_id)

keep if !missing(p_id)


* collapse to plants

* first, if plants have centroids far from any specific turbine.
* these may be incorrectly grouped turbines - not really plants. 
preserve
collapse p_tnum p_cap t_cap t_hh xlong ylat (firstnm) p_year t_state t_county t_fips p_name, by(p_id)
rename xlong plong
rename ylat plat
save "/projects/users/########/Snapshot2022/IntermediateData/TempData/PlantsTest.dta", replace
restore 
merge m:1 p_id using "/projects/users/########/Snapshot2022/IntermediateData/TempData/PlantsTest.dta"
drop _merge
gen londist = xlong - plong
gen latdist = ylat - plat
* hist londist
* browse if abs(londist)>1
* browse eia_id t_state t_fips p_name p_year p_tnum p_cap t_cap xlong ylat p_id londist latdist if abs(londist)>0.3
drop if abs(londist) > 2
drop if p_id == 701513 & londist > 0.7
drop if p_id == 701513 & latdist > 0.45
replace p_id = 900011 if p_id == 701626 & londist > 0.6
replace p_id = 900012 if p_id == 701860 & latdist > 0.15
drop plong-latdist


preserve
collapse p_tnum p_cap t_cap t_hh xlong ylat (firstnm) p_year t_state t_county t_fips p_name, by(p_id)
sort p_id
save "/projects/users/########/Snapshot2022/IntermediateData/PlantLevelWindDatabase2022.dta", replace
restore

preserve
collapse p_tnum p_cap t_cap t_hh xlong ylat (firstnm) p_year t_state t_county t_fips p_name, by(p_id)
sort p_id
keep if t_state == "AL" | t_state == "AZ" | t_state == "AR" | t_state == "CA" ///
      | t_state == "CO" | t_state == "DE" | t_state == "DC" | t_state == "IL" ///
      | t_state == "IN" | t_state == "IA" | t_state == "KS" | t_state == "ME" ///
      | t_state == "MD" | t_state == "MO" | t_state == "MT" | t_state == "NE" ///
      | t_state == "NV" | t_state == "NM" | t_state == "ND" | t_state == "OK" ///
      | t_state == "TN" | t_state == "VA" | t_state == "WY"
save "/projects/users/########/Snapshot2022/IntermediateData/PlantLevelWindDatabase2022_23.dta", replace
restore

***********************************
* Create county-level capacity panel
***********************************
* collapse to plants, then to county
preserve
collapse p_tnum p_cap t_cap t_hh xlong ylat (firstnm) p_year t_state t_county t_fips p_name, by(p_id)
sort p_id

collapse (sum) c_ptnum = p_tnum c_pcap = p_cap ///
		(firstnm) t_state t_county, by(t_fips p_year)
label var c_ptnum "Num Turbines from Plants Centered in Cnty"
label var c_pcap "Capacity from Plants Centered in Cnty"
save "/projects/users/########/Snapshot2022/IntermediateData/TempData/CountyLevelWindDatabaseP.dta", replace
restore

* get turbine data at county level
preserve
collapse (sum) c_tcap = t_cap (count) c_tnum = t_cap ///
	(mean)  xlong ylat c_tp_cap = p_cap c_tp_tnum = p_tnum ///
	(firstnm) t_state t_county, by(t_fips p_year)
label var c_tnum "Num Turbines in Cnty"
label var c_tcap "Capacity in Cnty"
label var c_tp_tnum "Num Turbines from Plants Partially in Cnty"
label var c_tp_cap "Capacity from Plants Partially in Cnty"
save "/projects/users/########/Snapshot2022/IntermediateData/TempData/CountyLevelWindDatabaseT.dta", replace
restore

* merge all together
use "/projects/users/########/Snapshot2022/IntermediateData/TempData/CountyLevelWindDatabaseT.dta", clear
merge 1:1 t_fips p_year using "/projects/users/########/Snapshot2022/IntermediateData/TempData/CountyLevelWindDatabaseP.dta", ///
	keepusing(c_ptnum c_pcap) 
drop _merge

destring(t_fips), gen(cnty)
gen year = p_year
sort cnty year
xtset cnty year
tsfill, full
recode c_tcap (mis=0)
recode c_tnum (mis=0)
recode c_tp_cap (mis=0)
recode c_tp_tnum (mis=0)
recode c_ptnum (mis=0)
recode c_pcap (mis=0)
drop t_fips
gen t_fips = string(cnty,"%05.0f")
merge m:m t_fips using "/projects/users/########/Snapshot2022/IntermediateData/TempData/CountyLevelWindDatabaseT.dta", ///
	 update nogen keepusing(xlong ylat t_state t_county)
bysort cnty (year): replace c_tcap = sum(c_tcap)
bysort cnty (year): replace c_tnum = sum(c_tnum)
bysort cnty (year): replace c_tp_cap = sum(c_tp_cap)
bysort cnty (year): replace c_tp_tnum = sum(c_tp_tnum)
bysort cnty (year): replace c_ptnum = sum(c_ptnum)
bysort cnty (year): replace c_pcap = sum(c_pcap)
rename t_fips ctyfips
save "/projects/users/########/Snapshot2022/IntermediateData/CountyLevelWindDatabase2022.dta", replace


